{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 导入数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>订单号</th>\n",
       "      <th>产品码</th>\n",
       "      <th>消费日期</th>\n",
       "      <th>产品说明</th>\n",
       "      <th>数量</th>\n",
       "      <th>单价</th>\n",
       "      <th>用户码</th>\n",
       "      <th>城市</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>536374</td>\n",
       "      <td>21258</td>\n",
       "      <td>6/1/2020 9:09</td>\n",
       "      <td>五彩玫瑰五支装</td>\n",
       "      <td>32</td>\n",
       "      <td>10.95</td>\n",
       "      <td>15100</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>536376</td>\n",
       "      <td>22114</td>\n",
       "      <td>6/1/2020 9:32</td>\n",
       "      <td>茉莉花白色25枝</td>\n",
       "      <td>48</td>\n",
       "      <td>3.45</td>\n",
       "      <td>15291</td>\n",
       "      <td>上海</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>536376</td>\n",
       "      <td>21733</td>\n",
       "      <td>6/1/2020 9:32</td>\n",
       "      <td>教师节向日葵3枝尤加利5枝</td>\n",
       "      <td>64</td>\n",
       "      <td>2.55</td>\n",
       "      <td>15291</td>\n",
       "      <td>上海</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>536378</td>\n",
       "      <td>22386</td>\n",
       "      <td>6/1/2020 9:37</td>\n",
       "      <td>百合粉色10花苞</td>\n",
       "      <td>10</td>\n",
       "      <td>1.95</td>\n",
       "      <td>14688</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>536378</td>\n",
       "      <td>85099C</td>\n",
       "      <td>6/1/2020 9:37</td>\n",
       "      <td>橙黄香槟色康乃馨</td>\n",
       "      <td>10</td>\n",
       "      <td>1.95</td>\n",
       "      <td>14688</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      订单号     产品码           消费日期           产品说明  数量     单价    用户码  城市\n",
       "0  536374   21258  6/1/2020 9:09        五彩玫瑰五支装  32  10.95  15100  北京\n",
       "1  536376   22114  6/1/2020 9:32       茉莉花白色25枝  48   3.45  15291  上海\n",
       "2  536376   21733  6/1/2020 9:32  教师节向日葵3枝尤加利5枝  64   2.55  15291  上海\n",
       "3  536378   22386  6/1/2020 9:37       百合粉色10花苞  10   1.95  14688  北京\n",
       "4  536378  85099C  6/1/2020 9:37       橙黄香槟色康乃馨  10   1.95  14688  北京"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd #导入Pandas\n",
    "df_sales = pd.read_csv('易速鲜花订单记录.csv') #载入数据\n",
    "df_sales.head() #显示头几行数据 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 整理日期格式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "日期范围（格式转化前）: 1/1/2021 10:11 ~ 9/9/2020 9:20\n",
      "日期范围（格式转化后）: 2020-06-01 09:09:00 ~ 2021-06-09 12:31:00\n"
     ]
    }
   ],
   "source": [
    "df_sales = pd.read_csv('易速鲜花订单记录.csv') #载入数据\n",
    "print('日期范围（格式转化前）: %s ~ %s' % (df_sales['消费日期'].min(), df_sales['消费日期'].max())) #显示日期范围（格式转换前）\n",
    "df_sales['消费日期'] = pd.to_datetime(df_sales['消费日期']) #转换日期格式\n",
    "print('日期范围（格式转化后）: %s ~ %s' % (df_sales['消费日期'].min(), df_sales['消费日期'].max()))#显示日期范围"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "日期范围（删除不完整的月份）: 2020-06-01 09:09:00 ~ 2021-05-31 17:39:00\n"
     ]
    }
   ],
   "source": [
    "df_sales = df_sales.loc[df_sales['消费日期'] < '2021-06-01'] #只保留整月数据\n",
    "print('日期范围（删除不完整的月份）: %s ~ %s' % (df_sales['消费日期'].min(), df_sales['消费日期'].max())) #显示日期范围"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 1200x600 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt #导入Matplotlib的pyplot模块\n",
    "#构建月度的订单数的DataFrame\n",
    "df_orders_monthly = df_sales.set_index('消费日期')['订单号'].resample('M').nunique()\n",
    "#设定绘图的画布\n",
    "ax = pd.DataFrame(df_orders_monthly.values).plot(grid=True,figsize=(12,6),legend=False)\n",
    "ax.set_xlabel('月份') # X轴label\n",
    "ax.set_ylabel('订单数') # Y轴Label\n",
    "ax.set_title('月度订单数') # 图题\n",
    "#设定X轴月份显示格式\n",
    "plt.xticks(\n",
    "    range(len(df_orders_monthly.index)), \n",
    "    [x.strftime('%m.%Y') for x in df_orders_monthly.index], \n",
    "    rotation=45)\n",
    "plt.show() # 绘图"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据清洗"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sales = df_sales.drop_duplicates() #删除重复的数据行"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 特征工程"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>订单号</th>\n",
       "      <th>产品码</th>\n",
       "      <th>消费日期</th>\n",
       "      <th>产品说明</th>\n",
       "      <th>数量</th>\n",
       "      <th>单价</th>\n",
       "      <th>用户码</th>\n",
       "      <th>城市</th>\n",
       "      <th>总价</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>536374</td>\n",
       "      <td>21258</td>\n",
       "      <td>2020-06-01 09:09:00</td>\n",
       "      <td>五彩玫瑰五支装</td>\n",
       "      <td>32</td>\n",
       "      <td>10.95</td>\n",
       "      <td>15100</td>\n",
       "      <td>北京</td>\n",
       "      <td>350.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>536376</td>\n",
       "      <td>22114</td>\n",
       "      <td>2020-06-01 09:32:00</td>\n",
       "      <td>茉莉花白色25枝</td>\n",
       "      <td>48</td>\n",
       "      <td>3.45</td>\n",
       "      <td>15291</td>\n",
       "      <td>上海</td>\n",
       "      <td>165.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>536376</td>\n",
       "      <td>21733</td>\n",
       "      <td>2020-06-01 09:32:00</td>\n",
       "      <td>教师节向日葵3枝尤加利5枝</td>\n",
       "      <td>64</td>\n",
       "      <td>2.55</td>\n",
       "      <td>15291</td>\n",
       "      <td>上海</td>\n",
       "      <td>163.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>536378</td>\n",
       "      <td>22386</td>\n",
       "      <td>2020-06-01 09:37:00</td>\n",
       "      <td>百合粉色10花苞</td>\n",
       "      <td>10</td>\n",
       "      <td>1.95</td>\n",
       "      <td>14688</td>\n",
       "      <td>北京</td>\n",
       "      <td>19.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>536378</td>\n",
       "      <td>85099C</td>\n",
       "      <td>2020-06-01 09:37:00</td>\n",
       "      <td>橙黄香槟色康乃馨</td>\n",
       "      <td>10</td>\n",
       "      <td>1.95</td>\n",
       "      <td>14688</td>\n",
       "      <td>北京</td>\n",
       "      <td>19.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      订单号     产品码                消费日期           产品说明  数量     单价    用户码  城市  \\\n",
       "0  536374   21258 2020-06-01 09:09:00        五彩玫瑰五支装  32  10.95  15100  北京   \n",
       "1  536376   22114 2020-06-01 09:32:00       茉莉花白色25枝  48   3.45  15291  上海   \n",
       "2  536376   21733 2020-06-01 09:32:00  教师节向日葵3枝尤加利5枝  64   2.55  15291  上海   \n",
       "3  536378   22386 2020-06-01 09:37:00       百合粉色10花苞  10   1.95  14688  北京   \n",
       "4  536378  85099C 2020-06-01 09:37:00       橙黄香槟色康乃馨  10   1.95  14688  北京   \n",
       "\n",
       "      总价  \n",
       "0  350.4  \n",
       "1  165.6  \n",
       "2  163.2  \n",
       "3   19.5  \n",
       "4   19.5  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sales['总价'] = df_sales['数量'] * df_sales['单价'] #计算每单的总价\n",
    "df_sales.head() #显示头几行数据  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 构建User用户表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>订单号</th>\n",
       "      <th>产品码</th>\n",
       "      <th>消费日期</th>\n",
       "      <th>产品说明</th>\n",
       "      <th>数量</th>\n",
       "      <th>单价</th>\n",
       "      <th>用户码</th>\n",
       "      <th>城市</th>\n",
       "      <th>总价</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>536374</td>\n",
       "      <td>21258</td>\n",
       "      <td>2020-06-01 09:09:00</td>\n",
       "      <td>五彩玫瑰五支装</td>\n",
       "      <td>32</td>\n",
       "      <td>10.95</td>\n",
       "      <td>15100</td>\n",
       "      <td>北京</td>\n",
       "      <td>350.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>536376</td>\n",
       "      <td>22114</td>\n",
       "      <td>2020-06-01 09:32:00</td>\n",
       "      <td>茉莉花白色25枝</td>\n",
       "      <td>48</td>\n",
       "      <td>3.45</td>\n",
       "      <td>15291</td>\n",
       "      <td>上海</td>\n",
       "      <td>165.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>536376</td>\n",
       "      <td>21733</td>\n",
       "      <td>2020-06-01 09:32:00</td>\n",
       "      <td>教师节向日葵3枝尤加利5枝</td>\n",
       "      <td>64</td>\n",
       "      <td>2.55</td>\n",
       "      <td>15291</td>\n",
       "      <td>上海</td>\n",
       "      <td>163.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>536378</td>\n",
       "      <td>22386</td>\n",
       "      <td>2020-06-01 09:37:00</td>\n",
       "      <td>百合粉色10花苞</td>\n",
       "      <td>10</td>\n",
       "      <td>1.95</td>\n",
       "      <td>14688</td>\n",
       "      <td>北京</td>\n",
       "      <td>19.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>536378</td>\n",
       "      <td>85099C</td>\n",
       "      <td>2020-06-01 09:37:00</td>\n",
       "      <td>橙黄香槟色康乃馨</td>\n",
       "      <td>10</td>\n",
       "      <td>1.95</td>\n",
       "      <td>14688</td>\n",
       "      <td>北京</td>\n",
       "      <td>19.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14564</th>\n",
       "      <td>545190</td>\n",
       "      <td>22937</td>\n",
       "      <td>2020-08-29 15:32:00</td>\n",
       "      <td>产品说明掩码</td>\n",
       "      <td>6</td>\n",
       "      <td>18.00</td>\n",
       "      <td>15656</td>\n",
       "      <td>苏州</td>\n",
       "      <td>108.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14565</th>\n",
       "      <td>545190</td>\n",
       "      <td>22722</td>\n",
       "      <td>2020-08-29 15:32:00</td>\n",
       "      <td>产品说明掩码</td>\n",
       "      <td>4</td>\n",
       "      <td>39.50</td>\n",
       "      <td>15656</td>\n",
       "      <td>苏州</td>\n",
       "      <td>158.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14566</th>\n",
       "      <td>545190</td>\n",
       "      <td>22457</td>\n",
       "      <td>2020-08-29 15:32:00</td>\n",
       "      <td>产品说明掩码</td>\n",
       "      <td>60</td>\n",
       "      <td>3.00</td>\n",
       "      <td>15656</td>\n",
       "      <td>苏州</td>\n",
       "      <td>180.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14567</th>\n",
       "      <td>545190</td>\n",
       "      <td>22464</td>\n",
       "      <td>2020-08-29 15:32:00</td>\n",
       "      <td>产品说明掩码</td>\n",
       "      <td>12</td>\n",
       "      <td>25.00</td>\n",
       "      <td>15656</td>\n",
       "      <td>苏州</td>\n",
       "      <td>300.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14568</th>\n",
       "      <td>545190</td>\n",
       "      <td>22423</td>\n",
       "      <td>2020-08-29 15:32:00</td>\n",
       "      <td>产品说明掩码</td>\n",
       "      <td>1</td>\n",
       "      <td>12.75</td>\n",
       "      <td>15656</td>\n",
       "      <td>苏州</td>\n",
       "      <td>12.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>14569 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          订单号     产品码                消费日期           产品说明  数量     单价    用户码  \\\n",
       "0      536374   21258 2020-06-01 09:09:00        五彩玫瑰五支装  32  10.95  15100   \n",
       "1      536376   22114 2020-06-01 09:32:00       茉莉花白色25枝  48   3.45  15291   \n",
       "2      536376   21733 2020-06-01 09:32:00  教师节向日葵3枝尤加利5枝  64   2.55  15291   \n",
       "3      536378   22386 2020-06-01 09:37:00       百合粉色10花苞  10   1.95  14688   \n",
       "4      536378  85099C 2020-06-01 09:37:00       橙黄香槟色康乃馨  10   1.95  14688   \n",
       "...       ...     ...                 ...            ...  ..    ...    ...   \n",
       "14564  545190   22937 2020-08-29 15:32:00         产品说明掩码   6  18.00  15656   \n",
       "14565  545190   22722 2020-08-29 15:32:00         产品说明掩码   4  39.50  15656   \n",
       "14566  545190   22457 2020-08-29 15:32:00         产品说明掩码  60   3.00  15656   \n",
       "14567  545190   22464 2020-08-29 15:32:00         产品说明掩码  12  25.00  15656   \n",
       "14568  545190   22423 2020-08-29 15:32:00         产品说明掩码   1  12.75  15656   \n",
       "\n",
       "       城市      总价  \n",
       "0      北京  350.40  \n",
       "1      上海  165.60  \n",
       "2      上海  163.20  \n",
       "3      北京   19.50  \n",
       "4      北京   19.50  \n",
       "...    ..     ...  \n",
       "14564  苏州  108.00  \n",
       "14565  苏州  158.00  \n",
       "14566  苏州  180.00  \n",
       "14567  苏州  300.00  \n",
       "14568  苏州   12.75  \n",
       "\n",
       "[14569 rows x 9 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sales_3m = df_sales[(df_sales.消费日期 > '2020-06-01') & (df_sales.消费日期 <= '2020-08-30')] #构建仅含头三个月数据的数据集\n",
    "df_sales_3m.reset_index(drop=True) #重置索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户码</th>\n",
       "      <th>R值</th>\n",
       "      <th>F值</th>\n",
       "      <th>M值</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15100</td>\n",
       "      <td>45</td>\n",
       "      <td>6</td>\n",
       "      <td>635.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15291</td>\n",
       "      <td>35</td>\n",
       "      <td>35</td>\n",
       "      <td>1329.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>14688</td>\n",
       "      <td>6</td>\n",
       "      <td>85</td>\n",
       "      <td>1472.28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15311</td>\n",
       "      <td>5</td>\n",
       "      <td>715</td>\n",
       "      <td>12711.66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15862</td>\n",
       "      <td>89</td>\n",
       "      <td>64</td>\n",
       "      <td>354.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>365</th>\n",
       "      <td>15951</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>375.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366</th>\n",
       "      <td>14745</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>240.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>367</th>\n",
       "      <td>15724</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>103.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>368</th>\n",
       "      <td>15874</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>584.35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>15656</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>920.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>370 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       用户码  R值   F值        M值\n",
       "0    15100  45    6    635.10\n",
       "1    15291  35   35   1329.95\n",
       "2    14688   6   85   1472.28\n",
       "3    15311   5  715  12711.66\n",
       "4    15862  89   64    354.23\n",
       "..     ...  ..  ...       ...\n",
       "365  15951   1   22    375.17\n",
       "366  14745   1    7    240.60\n",
       "367  15724   0    5    103.65\n",
       "368  15874   0    5    584.35\n",
       "369  15656   0   15    920.35\n",
       "\n",
       "[370 rows x 4 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user = pd.DataFrame(df_sales['用户码'].unique()) #生成以用户码为主键的结构\n",
    "df_user.columns = ['用户码'] #设定字段名\n",
    "df_user.head() #显示头几行数据\n",
    "df_R_value = df_sales_3m.groupby('用户码').消费日期.max().reset_index() #找到每个用户的最近消费日期，构建df_R_value对象\n",
    "df_R_value.columns = ['用户码','最近购买日期'] #设定字段名\n",
    "df_R_value['R值'] = (df_R_value['最近购买日期'].max() - df_R_value['最近购买日期']).dt.days #计算最新日期与上次消费日期的天数\n",
    "df_user = pd.merge(df_user, df_R_value[['用户码','R值']], on='用户码') #把上次消费距最新日期的天数（R值）合并至df_user结构\n",
    "df_F_value = df_sales_3m.groupby('用户码').消费日期.count().reset_index() #计算每个用户消费次数，构建df_F_value对象\n",
    "df_F_value.columns = ['用户码','F值'] #设定字段名\n",
    "df_user = pd.merge(df_user, df_F_value[['用户码','F值']], on='用户码') #把消费频率(F值)整合至df_user结构\n",
    "df_M_value = df_sales_3m.groupby('用户码').总价.sum().reset_index() #计算每个用户三个月消费总额，构建df_M_value对象\n",
    "df_M_value.columns = ['用户码','M值'] #设定字段名\n",
    "df_user = pd.merge(df_user, df_M_value, on='用户码') #把消费总额整合至df_user结构\n",
    "df_user #显示用户表结构"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 构建标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户码</th>\n",
       "      <th>R值</th>\n",
       "      <th>F值</th>\n",
       "      <th>M值</th>\n",
       "      <th>年度LTV</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15100</td>\n",
       "      <td>45</td>\n",
       "      <td>6</td>\n",
       "      <td>635.10</td>\n",
       "      <td>635.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15291</td>\n",
       "      <td>35</td>\n",
       "      <td>35</td>\n",
       "      <td>1329.95</td>\n",
       "      <td>4596.51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>14688</td>\n",
       "      <td>6</td>\n",
       "      <td>85</td>\n",
       "      <td>1472.28</td>\n",
       "      <td>4449.48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15311</td>\n",
       "      <td>5</td>\n",
       "      <td>715</td>\n",
       "      <td>12711.66</td>\n",
       "      <td>58218.04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15862</td>\n",
       "      <td>89</td>\n",
       "      <td>64</td>\n",
       "      <td>354.23</td>\n",
       "      <td>659.73</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>365</th>\n",
       "      <td>15951</td>\n",
       "      <td>1</td>\n",
       "      <td>22</td>\n",
       "      <td>375.17</td>\n",
       "      <td>375.17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366</th>\n",
       "      <td>14745</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>240.60</td>\n",
       "      <td>1167.16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>367</th>\n",
       "      <td>15724</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>103.65</td>\n",
       "      <td>212.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>368</th>\n",
       "      <td>15874</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>584.35</td>\n",
       "      <td>4330.67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>15656</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>920.35</td>\n",
       "      <td>1425.90</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>370 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       用户码  R值   F值        M值     年度LTV\n",
       "0    15100  45    6    635.10    635.10\n",
       "1    15291  35   35   1329.95   4596.51\n",
       "2    14688   6   85   1472.28   4449.48\n",
       "3    15311   5  715  12711.66  58218.04\n",
       "4    15862  89   64    354.23    659.73\n",
       "..     ...  ..  ...       ...       ...\n",
       "365  15951   1   22    375.17    375.17\n",
       "366  14745   1    7    240.60   1167.16\n",
       "367  15724   0    5    103.65    212.30\n",
       "368  15874   0    5    584.35   4330.67\n",
       "369  15656   0   15    920.35   1425.90\n",
       "\n",
       "[370 rows x 5 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_user_1y = df_sales.groupby('用户码')['总价'].sum().reset_index() #计算每个用户整年消费总额，构建df_user_1y对象\n",
    "df_user_1y.columns = ['用户码','年度LTV'] #设定字段名\n",
    "df_user_1y.head() #显示头几行数据\n",
    "df_LTV = pd.merge(df_user, df_user_1y, on='用户码', how='left') #构建整体LTV训练数据集\n",
    "df_LTV #显示df_LTV"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 创建特征集和标签集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>R值</th>\n",
       "      <th>F值</th>\n",
       "      <th>M值</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>45</td>\n",
       "      <td>6</td>\n",
       "      <td>635.10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>35</td>\n",
       "      <td>35</td>\n",
       "      <td>1329.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6</td>\n",
       "      <td>85</td>\n",
       "      <td>1472.28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>715</td>\n",
       "      <td>12711.66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>89</td>\n",
       "      <td>64</td>\n",
       "      <td>354.23</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   R值   F值        M值\n",
       "0  45    6    635.10\n",
       "1  35   35   1329.95\n",
       "2   6   85   1472.28\n",
       "3   5  715  12711.66\n",
       "4  89   64    354.23"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df_LTV.drop(['用户码','年度LTV'],axis=1) #特征集\n",
    "X.head() #显示特征集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      635.10\n",
       "1     4596.51\n",
       "2     4449.48\n",
       "3    58218.04\n",
       "4      659.73\n",
       "Name: 年度LTV, dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = df_LTV['年度LTV'] #标签集\n",
    "y.head() #显示标签集"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 拆分训练集、验证集和测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/lvan/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "# 先拆分训练集和其它集\n",
    "X_train, X_rem, y_train, y_rem = train_test_split(X,y, train_size=0.7,random_state = 0)\n",
    "# 再把其它集拆分成验证集和测试集 \n",
    "X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5,random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler #导入归一化缩放器\n",
    "scaler = MinMaxScaler() #创建归一化缩放器\n",
    "X_train = scaler.fit_transform(X_train) #拟合并转换训练集数据\n",
    "X_valid = scaler.transform(X_valid) #转换验证集数据\n",
    "X_test = scaler.transform(X_test) #转换测试集数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 选择算法创建模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression #导入线性回归模型\n",
    "from sklearn.linear_model import Lasso\n",
    "from sklearn.linear_model import Ridge\n",
    "model_lr = LinearRegression() #创建线性回归模型\n",
    "model_lasso = Lasso() #创建Lasso回归模型\n",
    "model_ridge = Ridge() #创建Ridge回归模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/lvan/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/base.py:509: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n",
      "  linalg.lstsq(X, y)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,\n",
       "   normalize=False, random_state=None, solver='auto', tol=0.001)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_lr.fit(X_train, y_train) #拟合线性回归模型\n",
    "model_lasso.fit(X_train, y_train) #拟合Lasso回归模型\n",
    "model_ridge.fit(X_train, y_train) #拟合Ridge回归模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 评估模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集上的R平方分数-线性回归: 0.6749\n",
      "训练集上的R平方分数-Lasso回归: 0.6748\n",
      "训练集上的R平方分数-Ridge回归: 0.6368\n",
      "测试集上的R平方分数-线性回归: 0.4647\n",
      "测试集上的R平方分数-Lasso回归: 0.4667\n",
      "测试集上的R平方分数-Ridge回归: 0.4988\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import r2_score,   median_absolute_error #导入Sklearn评估模块\n",
    "print('训练集上的R平方分数-线性回归: %0.4f' % r2_score(y_train, model_lr.predict(X_train)))\n",
    "print('训练集上的R平方分数-Lasso回归: %0.4f' % r2_score(y_train, model_lasso.predict(X_train)))\n",
    "print('训练集上的R平方分数-Ridge回归: %0.4f' % r2_score(y_train, model_ridge.predict(X_train)))\n",
    "print('测试集上的R平方分数-线性回归: %0.4f' % r2_score(y_valid, model_lr.predict(X_valid)))\n",
    "print('测试集上的R平方分数-Lasso回归: %0.4f' % r2_score(y_valid, model_lasso.predict(X_valid)))\n",
    "print('测试集上的R平方分数-Ridge回归: %0.4f' % r2_score(y_valid, model_ridge.predict(X_valid)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 绘图比较"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEMCAYAAAArnKpYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHKpJREFUeJzt3Xt8VOW97/HPj5sRRUVALgklKGgJEhCC4qEKXmoQegKViqCcilZp3VWx2Faqbg4H92lVtKVWdpVaa+Uo8dbdxrtHW+X0nIMQKHe8UAQJcgleCcgl4bf/mOFxgIRMYNbMBL7v12teznrWk5nfmuD6Zj1rrWfM3REREQFokukCREQkeygUREQkUCiIiEigUBARkUChICIigUJBREQChYKINBpmNsTM3jWzVWY26SD9RpqZm1lRfLmFmf3BzJaa2WIzG5zQ9wozW2Jmy83snjRsRlZTKIhIo2BmTYEZwKVAATDGzApq6dcKmAC8ndB8PYC79wK+CdxvZk3MrA0wDbjI3XsCHczsomi3JLspFESksTgbWOXuq919F1AKDK+l313APcCOhLYC4K8A7r4Z+AwoAk4F3nf3yni/14GR0ZTfOCgURKSxyAXWJSxXxNsCM+sLdHb3F/f72cVAiZk1M7OuQD+gM7AKOMPM8s2sGTAi3n7UapbpAkREUsHMmgC/BMbVsvpRoAdQDqwF/h9Q4+6fmtkNwFPAnnj7aWkpOEtZY5v7qG3btp6fn5/pMkQkzaqqqtiwYQPdu3cHYMOGDQB07NgRgJqaGpYuXUrTpk0B2L17N82aNeO0007juOOO2+e13nnnHbp06cKxxx67T3tlZSU7d+4kLy8v6s1JuwULFmxx93b1dnT3RvXo16+fi8jRZ/fu3d61a1dfvXq179y50wsLC33ZsmV19h80aJDPnz/f3d23bdvmVVVV7u7+2muv+XnnnRf6bdq0yd3dP/nkE+/du7e/++67EW5F5gDlnsQ+VsNHItIoNGvWjAcffJDi4mJqamq49tpr6dmzJ5MnT6aoqIiSkpI6f3bz5s0UFxfTpEkTcnNzmTVrVlg3YcIEFi9eDMDkyZM5/fTTI9+WbNboho+Kioq8vLw802WIiDQqZrbA3Yvq66erj0REJNDwkYhktd27d1NRUcGOHTvq7yzk5OSQl5dH8+bND+nnFQoiktUqKipo1aoV+fn5mFmmy8lq7s7HH39MRUUFXbt2PaTXiHT4qL55SsxsnJlVmtmi+OO6KOsRkcZnx44dtGnTRoGQBDOjTZs2h3VUFVkoJDtPCfCUu/eJPx6Jqh6RhnrllVc444wz6NatG3fffXed/Z577jnMjMQLIJYsWcK5555Lz5496dWrFzt27GDr1q306dMnPNq2bcstt9ySjk1p9BQIyTvczyrKI4Vk5yk56qV65wMwePBgzjjjjLAD2rx5c+TbcSSpqanhhz/8IS+//DIrVqxg9uzZrFix4oB+W7du5de//jXnnHNOaKuurmbs2LE89NBDLF++nDfffJPmzZvTqlUrFi1aFB5dunThsssuS+dmidQrylCod56SuJHxaWufNbNa5xwxs/FmVm5m5ZWVlbV1abSi2Pns9cQTT4Qd0CmnnJKW7TlSzJs3j27dunHqqafSokULRo8ezV/+8pcD+v3rv/4rt912Gzk5OaHttddeo7CwkN69ewPQpk2bcJftXu+99x6bN2/mvPPOi3ZDjkAdOsTOLaTq0aFDfqY3Katk+kTz88Bsd99pZt8H/ghcuH8nd58JzITYfQrpLTFaiTsfIOx8Cgr2HWnbu/OZNm1aaKtt5yOpsX79ejp3/upvlLy8PN5+++19+ixcuJB169YxbNiwfX4v7733HmZGcXExlZWVjB49mp/+9Kf7/GxpaSlXXHFF1g6L5Od3YO3aTZkuA4CXX36Zbdu2heVNm9YCqdsNbNq079F3NmvRojmFhb0jfY8oQ2E9+842mBdvC9z944TFR4B7I6wnK0W587nmmmto2rQpI0eO5M4778zaHVBjtGfPHiZOnMhjjz12wLrq6mr+/ve/M3/+fFq2bMlFF11Ev379uOiir6bpLy0t3eeu2myzdu0msuW+1pUroUePaN+jqJ5buqZMmcncuUtp1iy2y6yurmbAgF61tgENap8yZXzSdZaX727Qdh2KKENhPtA9Pk3temA0cGViBzPr6O4b4oslwMoI62mUDnXn88QTT5Cbm8vWrVsZOXIks2bN4rvf/W76N6CRys3NZd26r0Y/KyoqyM39avRz69atLFu2jMGDBwOwceNGSkpKKCsrIy8vj/PPP5+2bdsCMHToUBYuXBhCYfHixVRXV9OvX7/0bZActtLSn3PSSa0A+OyzrUyfPrvWtrr6Hqw9m0R2TsHdq4EbgVeJ7eyfdvflZjbVzPZOUnJz/CvwFgM3U/uUt0e0hux88vPzmTt3LiUlJZSXl++z82nZsmXY+ex9XYBWrVpx5ZVXMm/evPRuWCPXv39/3n//fT744AN27dpFaWnpPnPrnHjiiWzZsoU1a9awZs0aBgwYQFlZGUVFRRQXF7N06VK2b99OdXU1b7311j7DgbNnz2bMmDGZ2CyRekV6n4K7v+Tup7v7ae7+P+Ntk929LP78Z+7e0917u/sF7v5OlPVkoyh2PtXV1WzZsgWI3Q36wgsvcOaZZ2ZqExulxMnXevTowahRo8Lka2VlZQf92datWzNx4kT69+9Pnz596Nu3L8OGDQvrn376aYWCZK1Mn2g+6h3OzI+JOx8zY+jQoQwbNoxt27ZRXFzM7t27qamp4eKLL+b6669P41YdGYYOHcrQoUP3aZs6dWqtfd988819lseOHcvYsWNr7bt69eqU1CcSBYVCFkj1zue4445jwYIFKa1RJFu0b9+JTZtSd9FE+/adUvZaRwKFgog0Khs3Hni/iKSOps4WEZFARwoiIvU45ZTWfPe7/50mTWJ/R+/Zs4chQ86ttQ1ocHs20TevicgBzCyLbl57mR492ma6jKxQXg5F9d1pB6xcuZIe+93xp29eExGRBlMoiIhIoHMKknEd8vPZtHZtpssQEY6yUOjQIT8+w6JknWwZwJaYLJ48sUP+cDat/Shlr9e+Syc2rtFlrnsdVaGQ6il3JVWydwck2WfT2o9S+kfEpiQC8Pjjz6eqak7K3jObHVWhICKS7R577HkeffR5TjjhuNDWsWMbBg7szQMPPE+nTp0S2jvyu9/9LqXvr1AQETkEzz8/h3/7t0fZtWs3bdqcyBNP3EX79m14660FTJhwPxC7tHfOnJlUVX3JFVfczhdfVFFdXcNvfzuJ8847i9mzX+XnP/8D7s6wYd/gnntuAuCBB26lT58zwnvdckvs9W699VauuuqqhPbUf8e3QkFE5BB84xt9mDv3D5gZjzzyZ+6993Huv/9H3Hff/2LGjNsYOLA3VVXbyclpwcyZ/0Fx8QDuuONaampq2L59Bx99VMltt/2GBQtm0bp1Ky655Cb+/Oc3M71ZCgURkUNRUbGZK664nQ0btrBr1266do0N6wwc2JuJE3/FVVcN4bLLLiAvrz39+xdw7bV3sXt3NSNGDKJPnzP461/LGTy4H+3atQbgqquGMGfOPygs7JbJzdJ9CiIih+Kmm6Zx442Xs3RpKQ8/fDs7duwCYNKkcTzyyJ18+eVOBg68jnfeWcP55/dlzpyZ5Oa2Y9y4/8Hjj7+Y4errplAQkUalfZdOsUtmU/Ro3+XQps7+/PMqcnNPAeCPf3whtP/znxX06tWN2267mv79C3jnnTWsXbuB9u1P5vrrv811141g4cJ3OPvsnrz11kK2bPmMmpoaZs9+lUGD+qbkMzocGj4SkUYlE/cUbN++g7y8r749b+LEK5kyZTyXXz6J1q1P4MILi/jgg9i9E9Onz+ZvfyunSZMm9Ox5Kpde+l8oLX2NadNm0bx5M44/viWPPz6Fjh3bcvfdN3LBBT8IJ5qHDx/EY489n/btS6RQEBGpx549tX/H+fDhgw5o+81vfnJA29VXf4urr/7WAe1jxhQzZkzx4ReYQgoFEZEs0rr1Cdx++7/TokXz0FZY2I3WrU/g3nv/nWeeeSahvTDl769QEBHJIsOHD6r1CAQgN3dQUlNnHw6daBYRkUChICIigUJBREQCnVMQkUYlP7+YtWs/SdnrdelyMmvWvHrQPk2bnkOvXqdRXV1D166dmDVrKied1IqPPqrk5pvv49ln7zngZwYP/j733TeBoqKClNWaDgoFEWlU1q79JKVfv2FWf8Ace+wxLFr0JABXXz2FGTOe4Y47rqVTp3a1BsLhGjDgGtq2PSksf/DBel58cTrXXHMHXbp0SWj/gBdffJH8/PyUvbdCQUSkAc49txdLlrwPwJo1H/Gtb/2IZcue4ssvd3DNNVNZvPh9vv71fL78cmf4md///i/cc8/jnHTS8fTu3Z1jjmnBgw/+lMrKT/nBD37Bhx9uBGD69FsZOLA3AwacyfTpt4afnz49Fkhnnnkms2fPTmifnvLtUyiIiCSppqaGN96Yz/e+V3LAut/+9jlatsxh5cpnWLLkffr2/W8AfPRRJXfd9XsWLpxFq1bHceGFN9C7d3cAJky4nx/96Eq+8Y0+fPjhRoqLb2LlymcOeO10UiiIiNTjyy930qfPlaxfX0mPHl355jfPOaDPnDn/4OabrwCgsLB7mO103rzlDBrUl5NPPhGAyy+/iPfe+xCA11+fx4oVq8NrfPHFNqqqtke9OQelUBARqcfecwrbt++guPgmZsx4hptvHn3Yr7tnzx7mzv0DOTnHpKDK1NAlqSIiSWrZMocHHvgx99//BNXV1fusO//8s3jyydhVTMuWrWLJklUA9O9fwFtvLeTTT7+gurqa5577a/iZSy4ZwG9+83RYXrTo3TRsxcEpFESkUenS5eRUzpxNly4nN+j9zzrrDAoLuzF79mv7tN9ww0iqqrbTo8flTJ78MP36fR2A3NxTuP32cZx99jgGDryO/PxOnHji8QA88MCPKS9fQWHhGAoKRvHQQ39KzYd0GDR8JCKNSn33FEShqmrOPsvPP/+r8HzZsqcAOPbYHEpLf17rz1955RDGj7+M6upqvv3tnzBiRGxuo7ZtT+Kpp34RUdWHJtJQMLMhwK+BpsAj7n53Hf1GAs8C/d29PMqaRETSbcqUmbz++jx27NjFJZcMYMSIwQft/+GHGxkx4sdh+fPPqxg16pts3LiRESNGJLR/zqhRo1Jaa2ShYGZNgRnAN4EKYL6Zlbn7iv36tQImAG9HVYuISCbdd98tDer/pz9Nq7V92rRpjXqW1LOBVe6+2t13AaXA8Fr63QXcA+yIsBYRabT2pPQO5iOdH+aHFWUo5ALrEpYr4m2BmfUFOrv7Qb/F2szGm1m5mZVXVlamvlIRyVo5Oav4+ONqBUMS3J2PP/6YnJycQ36NjJ1oNrMmwC+BcfX1dfeZwEyAoqIi/dMQOYrk5U2homIKlZXdONovmNyyBVauXHnQPjk5OeTl5R3ye0QZCuuBzgnLefG2vVoBZwJvmhlAB6DMzEp0sllE9mre/FO6dp2Q6TKyQkHB4Q8P1SfK2J0PdDezrmbWAhgNlO1d6e6fu3tbd89393xgLqBAEBHJoMhCwd2rgRuBV4GVwNPuvtzMpprZgbNJiYhIxkV6TsHdXwJe2q9tch19B0dZi4iI1O/oPmsjIiL7UCiIiEigUBARkUChICIigUJBREQChYKIiAQKBRERCRQKIiISKBRERCRQKIiISKBQEBGRQKEgIiKBQkFERAKFgoiIBAoFEREJFAoiIhIoFEREJFAoiIhIoFAQEZFAoSAiIoFCQUREAoWCiIgECgUREQkUCiIiEigUREQkUCiIiEigUBARkUChICIigUJBREQChYKIiAQKBRERCRQKIiISRBoKZjbEzN41s1VmNqmW9T8ws6VmtsjM/m5mBVHWIyIiBxdZKJhZU2AGcClQAIypZaf/pLv3cvc+wL3AL6OqR0RE6hflkcLZwCp3X+3uu4BSYHhiB3f/ImHxOMAjrEdEROrRLMLXzgXWJSxXAOfs38nMfghMBFoAF9b2QmY2HhgP8LWvfS3lhYqISEzGTzS7+wx3Pw24Dbizjj4z3b3I3YvatWuX3gJFRI4iUYbCeqBzwnJevK0upcCICOsREZF6RBkK84HuZtbVzFoAo4GyxA5m1j1hcRjwfoT1iIhIPSI7p+Du1WZ2I/Aq0BR41N2Xm9lUoNzdy4AbzexiYDfwKXB1VPWIiEj9ojzRjLu/BLy0X9vkhOcTonx/ERFpmIyfaBYRkeyR1JGCmf0H8EFdq4Fj3P1fUlaViIhkRLLDR83cfWJdK+OhISIijVyyw0f13WmsO5FFRI4AOqcgIiJBssNHHcyspI51BhyfonpERCSDkg2Fn3DwIaKfpaAWERHJsGRD4RZiVx9ZHeuPARakpCIREckYXX0kIiKBrj4SEZFAVx+JiEigq49ERCRoyNVHB3P74RYiIiKZl2wo/ANo5+7/TGw0s0J3X5L6skREJBPqPadgZqOAd4DnzGy5mfVPWP1YVIWJiEj6JXOi+Xagn7v3Aa4BZpnZt+Pr6rpvQUREGqFkho+auvsGAHefZ2YXAC+YWWd0KaqIyBElmSOFrWZ22t6FeEAMBoYDPSOqS0REMiCZI4Ub2C883H2rmQ0BRkVSlYiIZES9oeDui+tYVZPiWkREJMOSufroBDP7mZk9aGaXWMxNwGp0pCAickRJZvhoFvAp8P+B64hdjWTACHdfFGFtIiKSZsmEwqnu3gvAzB4BNgBfc/cdkVYmIiJpl8zVR7v3PnH3GqBCgSAicmRK5kiht5l9EX9uwLHxZQPc3U+IrDoREUmrZK4+apqOQkREJPP0fQoiIhIoFEREJFAoiIhIoFAQEZFAoSAiIoFCQUREAoWCiIgEkYaCmQ0xs3fNbJWZTapl/UQzW2FmS8zsDTPrEmU9IiJycJGFgpk1BWYAlwIFwBgzK9iv2z+AIncvBJ4F7o2qHhERqV+URwpnA6vcfbW77wJKiX1bW+Duf3P37fHFuUBehPWIiEg9ogyFXGBdwnJFvK0u3wNerm2FmY03s3IzK6+srExhiSIikigrTjSb2VigCJhW23p3n+nuRe5e1K5du/QWJyJyFElmltRDtR7onLCcF2/bh5ldDNwBDHL3nRHWIyIi9YjySGE+0N3MuppZC2A0UJbYwczOAh4GStx9c4S1iIhIEiILBXevBm4EXgVWAk+7+3Izm2pmJfFu04DjgWfMbJGZldXxciIikgZRDh/h7i8BL+3XNjnh+cVRvr+IiDRMVpxoFhGR7KBQEBGRQKEgIiKBQkFERAKFgoiIBAoFEREJFAoiIhIoFEREJFAoiIhIoFAQEZFAoSAiIoFCQUREAoWCiIgECgUREQkUCiIiEigUREQkUCiIiEigUBARkUChICIigUJBREQChYKIiAQKBRERCRQKIiISKBRERCRQKIiISKBQEBGRQKEgIiKBQkFERAKFgoiIBAoFEREJFAoiIhIoFEREJIg0FMxsiJm9a2arzGxSLevPN7OFZlZtZt+JshYREalfZKFgZk2BGcClQAEwxswK9uv2ITAOeDKqOkREJHnNInzts4FV7r4awMxKgeHAir0d3H1NfN2eCOsQEZEkRTl8lAusS1iuiLc1mJmNN7NyMyuvrKxMSXEiInKgRnGi2d1nunuRuxe1a9cu0+WIiByxogyF9UDnhOW8eJuIiGSpKENhPtDdzLqaWQtgNFAW4fuJiMhhiiwU3L0auBF4FVgJPO3uy81sqpmVAJhZfzOrAC4HHjaz5VHVIyIi9Yvy6iPc/SXgpf3aJic8n09sWElERLJAozjRLCIi6aFQEBGRQKEgIiKBQkFERAKFgoiIBAoFEREJFAoiIhIoFEREJFAoiIhIoFAQEZFAoSAiIoFCQUREAoWCiIgECgUREQkUCiIiEigUREQkUCiIiEigUBARkUChICIigUJBREQChYKIiAQKBRERCRQKIiISKBRERCRQKIiISKBQEBGRQKEgIiKBQkFERAKFgoiIBAoFEREJFAoiIhIoFEREJFAoiIhIEGkomNkQM3vXzFaZ2aRa1h9jZk/F179tZvlR1iMiIgcXWSiYWVNgBnApUACMMbOC/bp9D/jU3bsBvwLuiaoeERGpX5RHCmcDq9x9tbvvAkqB4fv1GQ78Mf78WeAiM7MIaxIRkYNoFuFr5wLrEpYrgHPq6uPu1Wb2OdAG2JLYyczGA+Pji1Vm9u6hl5VU5rTdv4YsdGTVmJm/BY6szzDFkvyV6DNMjaRrPIy/m7sk0ynKUEgZd58JzEzX+5lZubsXpev9DoVqPHzZXh9kf43ZXh+oxoaKcvhoPdA5YTkv3lZrHzNrBpwIfBxhTSIichBRhsJ8oLuZdTWzFsBooGy/PmXA1fHn3wH+6u4eYU0iInIQkQ0fxc8R3Ai8CjQFHnX35WY2FSh39zLg98AsM1sFfEIsOLJB2oaqDoNqPHzZXh9kf43ZXh+oxgYx/WEuIiJ76Y5mEREJFAoiIhIoFAAzO9nM/reZvR//b+s6+r1iZp+Z2QtprC2rpwpJor7zzWyhmVWb2XfSWVsDapxoZivMbImZvWFmSV3PneYaf2BmS81skZn9vZbZATJaX0K/kWbmZpb2yyuT+AzHmVll/DNcZGbXZVN98T6j4v8Wl5vZk+msL3D3o/4B3AtMij+fBNxTR7+LgP8KvJCmupoC/wROBVoAi4GC/fr8C/BQ/Plo4Kk0fm7J1JcPFAKPA9/JwO82mRovAFrGn9+Qzs+wATWekPC8BHglm+qL92sFzAHmAkVZ+BmOAx5M97/BBtTXHfgH0Dq+fEomatWRQkzidBt/BEbU1snd3wC2pqsosn+qkHrrc/c17r4E2JOmmvaXTI1/c/ft8cW5xO6pybYav0hYPA5I5xUiyfw7BLiL2PxlO9JY217J1pgpydR3PTDD3T8FcPfNaa4R0PDRXu3dfUP8+UagfSaLSVDbVCG5dfVx92pg71Qh6ZBMfZnW0Bq/B7wcaUUHSqpGM/uhmf2T2JHtzWmqDZKoz8z6Ap3d/cU01pUo2d/zyPgw4bNm1rmW9VFJpr7TgdPN7P+a2VwzG5K26hI0imkuUsHMXgc61LLqjsQFd3cz03W6RyEzGwsUAYMyXUtt3H0GMMPMrgTu5KsbPzPKzJoAvyQ2PJPNngdmu/tOM/s+sSPsCzNcU6JmxIaQBhM7Wp1jZr3c/bN0F3FUcPeL61pnZpvMrKO7bzCzjkBGDttq0ZCpQioyMFVIMvVlWlI1mtnFxP5AGOTuO9NU214N/RxLgd9GWtG+6quvFXAm8GZ85LIDUGZmJe5eniU14u6J/188QuyIK12S+R1XAG+7+27gAzN7j1hIzE9PiTEaPopJnG7jauAvGawlUbZPFZJMfZlWb41mdhbwMFCSoXHcZGrsnrA4DHg/W+pz98/dva2757t7PrHzMukMhHprBIj/wbdXCbAym+oD/kzsKAEza0tsOGl1GmuMycTZ7Wx7EBuDf4PY/2ivAyfH24uARxL6/R+gEviSWKoXp6G2ocB7xK5cuCPeNpXY/3QAOcAzwCpgHnBqmj+7+urrH/+sthE7glmegd9vfTW+DmwCFsUfZVlY46+B5fH6/gb0zKb69uv7Jmm++ijJz/AX8c9wcfwz/HqW1WfEhuFWAEuB0en+DN1d01yIiMhXNHwkIiKBQkFERAKFgoiIBAoFEREJFAoiIhIoFEREJDhq7mgWOVxmNgUYAFTHm5oRu1GrtjZqa3f3KemoVeRQKRREGma0x+eiMbOTgFvqaKurr0hW0/CRiIgECgUREQkUCiIiEigUREQkUCiIiEigUBARkUCXpIokbzPwuJntiS83AV6po42DtItkLX2fgoiIBBo+EhGRQKEgIiKBQkFERAKFgoiIBAoFEREJ/hPy5mHDNSjSuAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "fig, ax = plt.subplots()    \n",
    "width = 0.25\n",
    "\n",
    "# 柱子数据\n",
    "\n",
    "bars1 = [r2_score(y_valid, model_lr.predict(X_valid))]\n",
    "bars2 = [r2_score(y_valid, model_lasso.predict(X_valid))]\n",
    "bars3 = [r2_score(y_valid, model_ridge.predict(X_valid))]\n",
    " \n",
    "# 柱状图x轴位置\n",
    "r1 = np.arange(len(bars1))\n",
    "r2 = [x + width for x in r1]\n",
    "r3 = [x + width*2 for x in r1]\n",
    " \n",
    "# 柱状图\n",
    "plt.bar(r1, bars1, width = width, color = 'blue', edgecolor = 'black', capsize=7, label='线性回归')\n",
    "plt.bar(r2, bars2, width = width, color = 'cyan', edgecolor = 'black', capsize=7, label='Lasso回归')\n",
    "plt.bar(r3, bars3, width = width, color = 'yellow', edgecolor = 'black', capsize=7, label='Ridge回归')\n",
    "\n",
    "# 布局\n",
    "plt.ylabel('R2分数')\n",
    "plt.xlabel('验证集')\n",
    "plt.legend()\n",
    "\n",
    "# 数值标签\n",
    "def addlabels(x,y):\n",
    "    for i in range(len(x)):\n",
    "        plt.text(i/50+x[i], round(y[i], 3)+0.02, round(y[i], 3), ha = 'center')\n",
    "addlabels(r1, bars1)\n",
    "addlabels(r2, bars2)\n",
    "addlabels(r3, bars3)\n",
    "\n",
    "# 绘图\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
